In the following section check if there is a relation between physico-chemical properties and the observed differences between the extraction methods.

Use here the protein datasets.

1 Load the data set

setwd("~/GitLab/20211202_extraction_method/")

## load cells data set
cells <- maxQuant("proteinGroups_overview.xlsx", intensity = "LFQ", 
    sheet = "cells", type = "xlsx")
cells <- assay(cells) %>% 
    transformAssay(method = "log") %>%
    MatrixQCvis:::updateSE(cells, assay = .)

## load fresh-frozen data set
ff <- maxQuant("proteinGroups_overview.xlsx", intensity = "LFQ", 
    sheet = "Fresh-frozen", type = "xlsx")
ff <- assay(ff) %>% 
    transformAssay(method = "log") %>%
    MatrixQCvis:::updateSE(ff, assay = .)

## load FFPE data set
ffpe <- maxQuant("proteinGroups_overview.xlsx", intensity = "LFQ", 
    sheet = "FFPE", type = "xlsx")
ffpe <- assay(ffpe) %>% 
    transformAssay(method = "log") %>%
    MatrixQCvis:::updateSE(ffpe, assay = .)

## load Serum-plasma
plasma_serum <- maxQuant("proteinGroups_overview.xlsx", intensity = "LFQ", 
    sheet = "Serum-Plasma", type = "xlsx")
plasma_serum <- assay(plasma_serum) %>% 
    transformAssay(method = "log") %>%
    MatrixQCvis:::updateSE(plasma_serum, assay = .)
column_keep <- c("Sample_IDs", "condition", "LFQ")

## load the annotation
annot <- openxlsx::read.xlsx("proteinGroups_overview.xlsx", 
    sheet = "annotation", startRow = 2)
annot <- annot[, column_keep]
annot <- annot[!annot$condition %in% "ignore", ]
annot <- annot[!is.na(annot$condition), ]

## truncate the annotation for Cells
annot_cells <- annot[grep(annot$condition, pattern = "Cells"), ]

## truncate the annotation for Fresh-frozen
annot_ff <- annot[grep(annot$Sample_IDs, pattern = "powder_tissue_AFA_|_T"), ]

## truncate the annotation for FFPE
annot_ffpe <- annot[grep(annot$Sample_IDs, pattern = "_FFPE"), ]

## truncate the annotation for Plasma and Serum
annot_plasma <- annot[grep(annot$condition, pattern = "Plasma"), ]
annot_serum <- annot[grep(annot$condition, pattern = "Serum"), ]

Load the protein FASTA files and cut the sequence names.

library(Biostrings)
hs <- readAAStringSet("2021_03_30_Uniprot_homo_sapiens_canonical - Copy.fasta")
mm <- readAAStringSet("20190204_uniprot-mus+musculus-filtered-reviewed_yes.fasta")

## for cells, ffpe, plasma, serum = human
names_hs <- names(hs@ranges)
names_hs <- strsplit(names_hs, split = "[|]")
names_hs <- unlist(lapply(names_hs, "[", 2))
hs <- data.frame(feature = names_hs, 
    sequence = as.character(unlist(lapply(hs, as.character))))

## for FF = mouse
names_mm <- names(mm@ranges)
names_mm <- strsplit(names_mm, split = "[|]")
names_mm <- unlist(lapply(names_mm, "[", 2))
mm <- data.frame(feature = names_mm, 
    sequence = as.character(unlist(lapply(mm, as.character))))

1.1 Update the colData and rowData slots with the annotations

Write the annotations and the sequences to the colData and rowData slots.

## cells
annot_cells$Sample_IDs <- make.names(annot_cells$Sample_IDs)
cells <- cells[, annot_cells$Sample_IDs]
cD <- colData(cells) %>% as.data.frame()
rD <- rowData(cells) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
cells@colData <- left_join(cD, annot_cells, by = c("name" = "Sample_IDs")) %>%
    DataFrame()
rowData(cells) <- left_join(rD, hs, by = c("feature_cut" = "feature")) %>% 
    DataFrame()

## fresh-frozen
annot_ff$Sample_IDs <- make.names(annot_ff$Sample_IDs)
ff <- ff[, annot_ff$Sample_IDs]
cD <- colData(ff) %>% as.data.frame()
rD <- rowData(ff) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
ff@colData <- left_join(cD, annot_ff, by = c("name" = "Sample_IDs")) %>%
  DataFrame()
rowData(ff) <- left_join(rD, mm, by = c("feature_cut" = "feature")) %>% 
    DataFrame()

## FFPE
annot_ffpe$Sample_IDs <- make.names(annot_ffpe$Sample_IDs)
ffpe <- ffpe[, annot_ffpe$Sample_IDs]
cD <- colData(ffpe) %>% as.data.frame()
rD <- rowData(ffpe) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
ffpe@colData <- left_join(cD, annot_ffpe, by = c("name" = "Sample_IDs")) %>%
  DataFrame()
rowData(ffpe) <- left_join(rD, hs, by = c("feature_cut" = "feature")) %>% 
    DataFrame()

## Plasma
annot_plasma$Sample_IDs <- make.names(annot_plasma$Sample_IDs)
plasma <- plasma_serum[, annot_plasma$Sample_IDs]
cD <- colData(plasma) %>% as.data.frame()
rD <- rowData(plasma) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
plasma@colData <- left_join(cD, annot_plasma, by = c("name" = "Sample_IDs")) %>%
  DataFrame()
rowData(plasma) <- left_join(rD, hs, by = c("feature_cut" = "feature")) %>% 
    DataFrame()

## Serum
annot_serum$Sample_IDs <- make.names(annot_serum$Sample_IDs)
serum <- plasma_serum[, annot_serum$Sample_IDs]
cD <- colData(serum) %>% as.data.frame()
rD <- rowData(serum) %>% as.data.frame()
rD$feature_cut <- unlist(lapply(strsplit(rD$feature, split = ";"), "[", 1))
serum@colData <- left_join(cD, annot_serum, by = c("name" = "Sample_IDs")) %>%
  DataFrame()
rowData(serum) <- left_join(rD, hs, by = c("feature_cut" = "feature")) %>% 
    DataFrame()

2 Differential expression analysis

For the differential expression analysis use in all cases the following scheme:

  • create the model matrix,
  • fit a linear model for each protein taking into account the model matrix using lmFit and method "ls",
  • create the contrasts between two levels as specified in each section,
  • compute contrasts from linear model fit; given the linear model fit to the data, compute estimated coefficients and standard errors for a given set of contrasts (using contrasts.fit)
  • run Empirical Bayes Statistics for differential expressino; Given a linear model fit from lmFit, compute moderated t-statistics, moderated F-statistic, and log-odds of differential expression by empirical Bayes moderation of the standard errors towards a global value (using eBayes)
## set parameters for differential expression (num only for display)
num <- Inf
p_val <- 1
adj <- "BH"

3 Cells

contrasts:

  • Cells_AFA - Cells_TwoPhase_AFA

3.1 Cells: Overlap, uniqueness of features

The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.

## png 
##   2

Only continue with the shared features in the following analyses.

3.2 Cells: CV

## [1] "mean: Cells_AFA"
## [1] 1.493834
## [1] "mean: Cells_TwoPhase_AFA"
## [1] 1.594873
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  cv_a and cv_b
## V = 4226497, p-value = 0.003558
## alternative hypothesis: true location shift is not equal to 0

The TwoPhase method shows higher CV compared to the traditional AFA method.

3.3 Cells: Differential expression analysis

3.4 Cells: Enrichment analysis

Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).

## [1] "## BP"
## [1] "## MF"
## [1] "## CC"

3.5 Cells: Association to physico-chemical parameters

Take the t-values and plot against the GRAVY-Score and Isoelectric point

t <- tT[, "t"]

## GRAVY score
rD <- rowData(cells)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_cor).
## Warning: Removed 6 rows containing missing values (geom_point).

cor.test(gravy, t, method = "spearman")
## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  gravy and t
## S = 1.5895e+10, p-value = 0.9366
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.00117734
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor.test(iep, t, method = "spearman")
## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  iep and t
## S = 1.629e+10, p-value = 0.1363
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.02203543

Create violin plots for shared/unique features.

## gravy
feat <- read.table(file = "unique_features_cells_Cells_AFA.txt", 
    header = TRUE)
gravy_other <- lapply(rowData(cells_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_cells_Cells_TwoPhase_AFA.txt",
    header = TRUE)
gravy_tp <- lapply(rowData(cells_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(gravy), "shared"), 
    cbind(unlist(gravy_other), "autoSP3"),
    cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_gravy_cells.pdf", 
    method = "wilcox.test", paired = FALSE)
## Warning: Removed 6 rows containing non-finite values (stat_ydensity).
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
## Warning: Removed 6 rows containing non-finite values (stat_signif).
## Warning: Removed 6 rows containing non-finite values (stat_ydensity).
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
## Warning: Removed 6 rows containing non-finite values (stat_signif).

## iep
feat <- read.table(file = "unique_features_cells_Cells_AFA.txt", 
    header = TRUE)
iep_other <- lapply(rowData(cells_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_cells_Cells_TwoPhase_AFA.txt",
    header = TRUE)
iep_tp <- lapply(rowData(cells_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(iep), "shared"), 
    cbind(unlist(iep_other), "autoSP3"),
    cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_iep_cells.pdf", 
    method = "wilcox.test", paired = FALSE)

4 Fresh-frozen

contrasts:

  • powder_AFA - powder_TwoPhase_AFA (contrast 1)
  • Tissue_bulk_AFA - powder_Two_Phase_AFA (contrast 2)

Upset plot for all condition types.

## png 
##   2

4.1 Fresh-frozen: Overlap, uniqueness of features (contrast 1)

The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.

Only continue with the shared features in the following analyses.

4.2 Fresh-frozen: CV (contrast 1)

## [1] "mean: powder_AFA"
## [1] 1.67778
## [1] "mean: powder_TwoPhase_AFA"
## [1] 2.082931
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  cv_a and cv_b
## V = 2409681, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

4.3 Fresh-frozen: Differential expression analysis (contrast 1)

## Warning: Partial NA coefficients for 472 probe(s)
## [1] "## contrast: powder_AFA - powder_TwoPhase_AFA"

4.4 Fresh-frozen: Enrichment analysis (contrast 1)

Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).

## [1] "## BP"
## [1] "## MF"
## [1] "## CC"

4.5 Fresh-frozen: Association to physico-chemical parameters (contrast 1)

Take the t-values and plot against the GRAVY-Score and Isoelectric point

t <- tT[, "t"]

## GRAVY score
rD <- rowData(ff)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_cor).
## Warning: Removed 12 rows containing missing values (geom_point).

cor.test(gravy, t, method = "spearman")
## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  gravy and t
## S = 7789494545, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3138748
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor.test(iep, t, method = "spearman")
## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  iep and t
## S = 1.0298e+10, p-value = 9.888e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1008457

Create violin plots for shared/unique features.

## gravy
feat <- read.table(file = "unique_features_ff_contrast1_powder.txt", 
    header = TRUE)
gravy_other <- lapply(rowData(ff_1_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_ff_contrast1_powder_TwoPhase.txt",
    header = TRUE)
gravy_tp <- lapply(rowData(ff_1_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(gravy), "shared"), 
    cbind(unlist(gravy_other), "autoSP3"),
    cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_gravy_freshfrozen_1.pdf", 
    method = "wilcox.test", paired = FALSE)
## Warning: Removed 14 rows containing non-finite values (stat_ydensity).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing non-finite values (stat_signif).
## Warning: Removed 14 rows containing non-finite values (stat_ydensity).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing non-finite values (stat_signif).

## iep
feat <- read.table(file = "unique_features_ff_contrast1_powder.txt", 
    header = TRUE)
iep_other <- lapply(rowData(ff_1_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_ff_contrast1_powder_TwoPhase.txt",
    header = TRUE)
iep_tp <- lapply(rowData(ff_1_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(iep), "shared"), 
    cbind(unlist(iep_other), "autoSP3"),
    cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_iep_freshfrozen_1.pdf", 
    method = "wilcox.test", paired = FALSE)

4.6 Fresh-frozen: Overlap, uniqueness of features (contrast 2)

The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.

Only continue with the shared features in the following analyses.

4.7 Fresh-frozen: CV (contrast 2)

## [1] "mean: Tissue_bulk_AFA"
## [1] 2.394138
## [1] "mean: powder_TwoPhase_AFA"
## [1] 2.05866
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  cv_a and cv_b
## V = 3740512, p-value = 4.703e-11
## alternative hypothesis: true location shift is not equal to 0

4.8 Fresh-frozen: Differential expression analysis (contrast 2)

## [1] "## contrast: Tissue_bulk_AFA - powder_TwoPhase_AFA"

4.9 Fresh-frozen: Enrichment analysis (contrast 2)

Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).

## [1] "## BP"
## [1] "## MF"
## [1] "## CC"

4.10 Fresh-frozen: Association to physico-chemical parameters (contrast 2)

Take the t-values and plot against the GRAVY-Score and Isoelectric point

t <- tT[, "t"]

## GRAVY score
rD <- rowData(ff)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing non-finite values (stat_cor).
## Warning: Removed 10 rows containing missing values (geom_point).

cor.test(gravy, t, method = "spearman")
## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  gravy and t
## S = 7986424933, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2711298
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor.test(iep, t, method = "spearman")
## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  iep and t
## S = 9982874054, p-value = 1.079e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.09566522

Create violin plots for shared/unique features.

## gravy
feat <- read.table(file = "unique_features_ff_contrast2_Tissue_bulk.txt", 
    header = TRUE)
gravy_other <- lapply(rowData(ff_2_all)[feat[, 1], "sequence"], function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_ff_contrast2_powder_TwoPhase.txt",
    header = TRUE)
gravy_tp <- lapply(rowData(ff_2_all)[feat[, 1], "sequence"], function(aa) calculateGravyScore(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(gravy), "shared"), 
    cbind(unlist(gravy_other), "autoSP3"),
    cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_gravy_freshfrozen_2.pdf", 
    method = "wilcox.test", paired = FALSE)
## Warning: Removed 14 rows containing non-finite values (stat_ydensity).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing non-finite values (stat_signif).
## Warning: Removed 14 rows containing non-finite values (stat_ydensity).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing non-finite values (stat_signif).

## iep
feat <- read.table(file = "unique_features_ff_contrast2_Tissue_bulk.txt", 
    header = TRUE)
iep_other <- lapply(rowData(ff_2_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_ff_contrast2_powder_TwoPhase.txt",
    header = TRUE)
iep_tp <- lapply(rowData(ff_2_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(iep), "shared"), 
    cbind(unlist(iep_other), "autoSP3"),
    cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_iep_freshfrozen_2.pdf", 
    method = "wilcox.test", paired = FALSE)

5 FFPE

contrasts:

  • powder_AFA - powder_TwoPhase_AFA (contrast 1)
  • bulk_FFPE_AFA - powder_TwoPhase_AFA (contrast 2)

Upset plot for all condition types.

## png 
##   2

5.1 FFPE: Overlap, uniqueness of features (contrast 1)

The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.

Only continue with the shared features in the following analyses.

5.2 FFPE: CV (contrast 1)

## [1] "mean: powder_AFA"
## [1] 3.338009
## [1] "mean: powder_TwoPhase_AFA"
## [1] 2.251171
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  cv_a and cv_b
## V = 3069548, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

5.3 FFPE: Differential expression analysis (contrast 1)

## Warning: Partial NA coefficients for 561 probe(s)
## [1] "## contrast: powder_AFA - powder_TwoPhase_AFA"

5.4 FFPE: Enrichment analysis (contrast 1)

Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).

## [1] "## BP"
## [1] "## MF"
## [1] "## CC"

5.5 FFPE: Association to physico-chemical parameters (contrast 1)

Take the t-values and plot against the GRAVY-Score and Isoelectric point

t <- tT[, "t"]

## GRAVY score
rD <- rowData(ffpe)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_cor).
## Warning: Removed 6 rows containing missing values (geom_point).

cor.test(gravy, t, method = "spearman")
## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  gravy and t
## S = 5925479107, p-value = 0.02808
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.03805362
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor.test(iep, t, method = "spearman")
## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  iep and t
## S = 6279457713, p-value = 0.4214
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.01392261

Create violin plots for shared/unique features.

## gravy
feat <- read.table(file = "unique_features_ffpe_contrast1_powder.txt", 
    header = TRUE)
gravy_other <- lapply(rowData(ffpe_1_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_ffpe_contrast1_TwoPhase.txt",
    header = TRUE)
gravy_tp <- lapply(rowData(ffpe_1_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(gravy), "shared"), 
    cbind(unlist(gravy_other), "autoSP3"),
    cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_gravy_FFPE_1.pdf", 
    method = "wilcox.test", paired = FALSE)
## Warning: Removed 7 rows containing non-finite values (stat_ydensity).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_signif).
## Warning: Removed 7 rows containing non-finite values (stat_ydensity).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_signif).

## iep
feat <- read.table(file = "unique_features_ffpe_contrast1_powder.txt", 
    header = TRUE)
iep_other <- lapply(rowData(ffpe_1_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_ffpe_contrast1_TwoPhase.txt",
    header = TRUE)
iep_tp <- lapply(rowData(ffpe_1_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(iep), "shared"), 
    cbind(unlist(iep_other), "autoSP3"),
    cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_iep_FFPE_1.pdf", 
    method = "wilcox.test", paired = FALSE)

5.6 FFPE: Overlap, uniqueness of features (contrast 2)

The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.

Only continue with the shared features in the following analyses.

5.7 FFPE: CV (contrast 2)

## [1] "mean: FFPE_AFA"
## [1] 3.115173
## [1] "mean: powder_TwoPhase_AFA"
## [1] 2.295979
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  cv_a and cv_b
## V = 3486655, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

5.8 FFPE: Differential expression analysis (contrast 2)

## [1] "## contrast: FFPE_AFA - powder_TwoPhase_AFA"

5.9 FFPE: Enrichment analysis (contrast 2)

Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).

## [1] "## BP"
## [1] "## MF"
## [1] "## CC"

5.10 FFPE: Association to physico-chemical parameters (contrast 2)

Take the t-values and plot against the GRAVY-Score and Isoelectric point

t <- tT[, "t"]

## GRAVY score
rD <- rowData(ffpe)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing non-finite values (stat_cor).
## Warning: Removed 7 rows containing missing values (geom_point).

cor.test(gravy, t, method = "spearman")
## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  gravy and t
## S = 5145845681, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2648622
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor.test(iep, t, method = "spearman")
## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  iep and t
## S = 7100055161, p-value = 0.628
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##          rho 
## -0.008213679

Create violin plots for shared/unique features.

## gravy
feat <- read.table(file = "unique_features_ffpe_contrast2_FFPE.txt", 
    header = TRUE)
gravy_other <- lapply(rowData(ffpe_2_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_ffpe_contrast2_powder_TwoPhase.txt",
    header = TRUE)
gravy_tp <- lapply(rowData(ffpe_2_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(gravy), "shared"), 
    cbind(unlist(gravy_other), "autoSP3"),
    cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_gravy_FFPE_2.pdf", 
    method = "wilcox.test", paired = FALSE)
## Warning: Removed 7 rows containing non-finite values (stat_ydensity).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_signif).
## Warning: Removed 7 rows containing non-finite values (stat_ydensity).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_signif).

## iep
feat <- read.table(file = "unique_features_ffpe_contrast2_FFPE.txt", 
    header = TRUE)
iep_other <- lapply(rowData(ffpe_2_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_ffpe_contrast2_powder_TwoPhase.txt",
    header = TRUE)
iep_tp <- lapply(rowData(ffpe_2_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(iep), "shared"), 
    cbind(unlist(iep_other), "autoSP3"),
    cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_iep_FFPE_2.pdf", 
    method = "wilcox.test", paired = FALSE)

6 Plasma

contrasts:

  • Plasma_AFA - Plasma_TwoPhase_AFA

Upset plot for all condition types.

## png 
##   2

6.1 Plasma: Overlap, uniqueness of features

The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.

Only continue with the shared features in the following analyses.

6.2 Plasma: CV

## [1] "mean: Plasma_AFA"
## [1] 2.324831
## [1] "mean: Plasma_TwoPhase_AFA"
## [1] 1.637527
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  cv_a and cv_b
## V = 19615, p-value = 6.259e-09
## alternative hypothesis: true location shift is not equal to 0

6.3 Plasma: Differential expression analysis

## [1] "## contrast: plasma_AFA - plasma_TwoPhase_AFA"

6.4 Plasma: Enrichment analysis

Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).

## [1] "## BP"
## [1] "## MF"
## [1] "## CC"

6.5 Plasma: Association to physico-chemical parameters

Take the t-values and plot against the GRAVY-Score and Isoelectric point.

t <- tT[, "t"]

## GRAVY score
rD <- rowData(plasma)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing non-finite values (stat_cor).
## Warning: Removed 3 rows containing missing values (geom_point).

cor.test(gravy, t, method = "spearman")
## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  gravy and t
## S = 2674532, p-value = 0.7424
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.02072509
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor.test(iep, t, method = "spearman")
## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  iep and t
## S = 2523773, p-value = 0.08425
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##     rho 
## 0.10791

Create violin plots for shared/unique features.

## gravy
feat <- read.table(file = "unique_features_plasma_Plasma_AFA.txt", 
    header = TRUE)
gravy_other <- lapply(rowData(plasma_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_plasma_Plasma_TwoPhase.txt",
    header = TRUE)
gravy_tp <- lapply(rowData(plasma_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(gravy), "shared"), 
    cbind(unlist(gravy_other), "autoSP3"),
    cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_gravy_plasma.pdf", 
    method = "wilcox.test", paired = FALSE)
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_signif).
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_signif).

## iep
feat <- read.table(file = "unique_features_plasma_Plasma_AFA.txt", 
    header = TRUE)
iep_other <- lapply(rowData(plasma_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_plasma_Plasma_TwoPhase.txt",
    header = TRUE)
iep_tp <- lapply(rowData(plasma_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(iep), "shared"), 
    cbind(unlist(iep_other), "autoSP3"),
    cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_iep_plasma.pdf", 
    method = "wilcox.test", paired = FALSE)

7 Serum

contrasts:

  • Serum_AFA - Serum_TwoPhase_AFA

Upset plot for all condition types.

## png 
##   2

7.1 Serum: Overlap, uniqueness of features

The plot shows the interaction of sets between different variables depending on their presence. The dots in the UpSet plot specify if the criteria for presence are fulfilled: Presence is defined by a feature being measured in at least one sample of a set.

Only continue with the shared features in the following analyses.

7.2 Serum: CV

## [1] "mean: Serum_AFA"
## [1] 1.945501
## [1] "mean: Serum_TwoPhase_AFA"
## [1] 1.94307
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  cv_a and cv_b
## V = 12006, p-value = 0.3543
## alternative hypothesis: true location shift is not equal to 0

7.3 Serum: Differential expression analysis

## [1] "## contrast: Serum_AFA - Serum_TwoPhase_AFA"

7.4 Serum: Enrichment analysis

Run now the GO enrichment tests. Use the raw (unadjusted) values and return the GO terms for the ontologies Biological Process (BP), Molecular Function (MF) and Cellular Component (CC). Perform a Fisher test with the significant features (\(\alpha\) < 0.05).

## [1] "## BP"
## [1] "## MF"
## [1] "## CC"

7.5 Serum: Association to physico-chemical parameters

Take the t-values and plot against the GRAVY-Score and Isoelectric point.

t <- tT[, "t"]

## GRAVY score
rD <- rowData(serum)
gravy <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateGravyScore(aa))
gravy <- unlist(gravy)
df <- data.frame(gravy = gravy, t = t)
ggplot(df, aes(x = gravy, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing non-finite values (stat_cor).
## Warning: Removed 3 rows containing missing values (geom_point).

cor.test(gravy, t, method = "spearman")
## Warning in cor.test.default(gravy, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  gravy and t
## S = 2201955, p-value = 0.9083
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## 0.007518585
## isoelectric point
iep <- lapply(rD[rownames(tT), "sequence"], function(aa) calculateIsoelectricPoint(aa, method = "IPC_protein"))
iep <- unlist(iep)
df <- data.frame(iep = iep, t = t)
ggplot(df, aes(x = iep, y = t)) + 
    geom_point(alpha = 0.3) + 
    geom_smooth(method = lm, se = FALSE) +
    ggpubr::stat_cor(method = "spearman") +
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor.test(iep, t, method = "spearman")
## Warning in cor.test.default(iep, t, method = "spearman"): Cannot compute exact
## p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  iep and t
## S = 2268889, p-value = 0.8145
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.01522211

Create violin plots for shared/unique features.

## gravy
feat <- read.table(file = "unique_features_serum_Serum_AFA.txt", 
    header = TRUE)
gravy_other <- lapply(rowData(serum_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))
feat <- read.table(file = "unique_features_serum_Serum_TwoPhase.txt",
    header = TRUE)
gravy_tp <- lapply(rowData(serum_all)[feat[, 1], "sequence"], 
    function(aa) calculateGravyScore(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(gravy), "shared"), 
    cbind(unlist(gravy_other), "autoSP3"),
    cbind(unlist(gravy_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_gravy_serum.pdf", 
    method = "wilcox.test", paired = FALSE)
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_signif).
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_signif).

## iep
feat <- read.table(file = "unique_features_serum_Serum_AFA.txt", 
    header = TRUE)
iep_other <- lapply(rowData(serum_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))
feat <- read.table(file = "unique_features_serum_Serum_TwoPhase.txt",
    header = TRUE)
iep_tp <- lapply(rowData(serum_all)[feat[, 1], "sequence"], 
    function(aa) calculateIsoelectricPoint(aa))

## create df
df <- data.frame(rbind(
    cbind(unlist(iep), "shared"), 
    cbind(unlist(iep_other), "autoSP3"),
    cbind(unlist(iep_tp), "MTBE-SP3")))
colnames(df) <- c("score", "type") 
df$score <- as.numeric(df$score)
df$type <- factor(df$type, levels = c("autoSP3", "MTBE-SP3", "shared")) 
comparisons <- list( c("autoSP3", "shared"), c("MTBE-SP3", "shared"), 
    c("autoSP3", "MTBE-SP3"))

plot_violin_barplot(df = df, x = "type", y = "score", fill = "type",
    comparisons = comparisons, file = "protein_violin_iep_serum.pdf", 
    method = "wilcox.test", paired = FALSE)

8 CV plot for all conditions

cv_cells_a <- assay(cells)[feat_cells, cells$condition == "Cells_AFA"]
cv_cells_b <- assay(cells)[feat_cells, cells$condition == "Cells_TwoPhase_AFA"]
cv_ff_a <- assay(ff)[feat_ff, ff$condition == "powder_AFA"]
cv_ff_b <- assay(ff)[feat_ff, ff$condition == "powder_TwoPhase_AFA"]
cv_ff_c <- assay(ff)[feat_ff, ff$condition == "Tissue_bulk_AFA"]
cv_ffpe_a <- assay(ffpe)[feat_ffpe, ffpe$condition == "powder_AFA"]
cv_ffpe_b <- assay(ffpe)[feat_ffpe, ffpe$condition == "powder_TwoPhase_AFA"]
cv_ffpe_c <- assay(ffpe)[feat_ffpe, ffpe$condition == "FFPE_AFA"]
cv_plasma_a <- assay(plasma)[feat_plasma, plasma$condition == "Plasma_TwoPhase_AFA"]
cv_plasma_b <- assay(plasma)[feat_plasma, plasma$condition == "Plasma_AFA"]
cv_serum_a <- assay(serum)[feat_serum, serum$condition == "Serum_TwoPhase_AFA"]
cv_serum_b <- assay(serum)[feat_serum, serum$condition == "Serum_AFA"]

## calculate cvs
cv_cells_a <- MatrixQCvis::cv(t(cv_cells_a))[[1]]
cv_cells_b <- MatrixQCvis::cv(t(cv_cells_b))[[1]]
cv_ff_a <- MatrixQCvis::cv(t(cv_ff_a))[[1]]
cv_ff_b <- MatrixQCvis::cv(t(cv_ff_b))[[1]]
cv_ff_c <- MatrixQCvis::cv(t(cv_ff_c))[[1]]
cv_ffpe_a <- MatrixQCvis::cv(t(cv_ffpe_a))[[1]]
cv_ffpe_b <- MatrixQCvis::cv(t(cv_ffpe_b))[[1]]
cv_ffpe_c <- MatrixQCvis::cv(t(cv_ffpe_c))[[1]]
cv_plasma_a <- MatrixQCvis::cv(t(cv_plasma_a))[[1]]
cv_plasma_b <- MatrixQCvis::cv(t(cv_plasma_b))[[1]]
cv_serum_a <- MatrixQCvis::cv(t(cv_serum_a))[[1]]
cv_serum_b <- MatrixQCvis::cv(t(cv_serum_b))[[1]]
 
## create data frames
cv_cells_a <- data.frame(protein = names(cv_cells_a), cv = cv_cells_a, 
    condition = "Cells_AFA", experiment = "cells")
cv_cells_b <- data.frame(protein = names(cv_cells_b), cv = cv_cells_b, 
    condition = "Cells_TwoPhase_AFA", experiment = "cells")
cv_ff_a <- data.frame(protein = names(cv_ff_a), cv = cv_ff_a, 
    condition = "powder_AFA", experiment = "fresh-frozen")
cv_ff_b <- data.frame(protein = names(cv_ff_b), cv = cv_ff_b, 
    condition = "powder_TwoPhase_AFA", experiment = "fresh-frozen")
cv_ff_c <- data.frame(protein = names(cv_ff_c), cv = cv_ff_c, 
    condition = "Tissue_bulk_AFA", experiment = "fresh-frozen")
cv_ffpe_a <- data.frame(protein = names(cv_ffpe_a), cv = cv_ffpe_a, 
    condition = "powder_AFA", experiment = "FFPE")
cv_ffpe_b <- data.frame(protein = names(cv_ffpe_b), cv = cv_ffpe_b, 
    condition = "powder_TwoPhase_AFA", experiment = "FFPE")
cv_ffpe_c <- data.frame(protein = names(cv_ffpe_c), cv = cv_ffpe_c, 
    condition = "FFPE_AFA", experiment = "FFPE")
cv_plasma_a <- data.frame(protein = names(cv_plasma_a), cv = cv_plasma_a, 
    condition = "Plasma_TwoPhase_AFA", experiment = "plasma")
cv_plasma_b <- data.frame(protein = names(cv_plasma_b), cv = cv_plasma_b, 
    condition = "Plasma__AFA", experiment = "plasma")
cv_serum_a <- data.frame(protein = names(cv_serum_a), cv = cv_serum_a, 
    condition = "Serum_TwoPhase_AFA", experiment = "serum")
cv_serum_b <- data.frame(protein = names(cv_serum_b), cv = cv_serum_b, 
    condition = "Serum_AFA", experiment = "serum")

## rbind data frames
cv_df <- rbind(cv_cells_a, cv_cells_b, cv_ff_a, cv_ff_b, cv_ff_c, cv_ffpe_a,
    cv_ffpe_b, cv_ffpe_c, cv_plasma_a, cv_plasma_b, cv_serum_a, cv_serum_b)
cv_df$condition <- factor(cv_df$condition, levels = sort(unique(cv_df$condition)))

g <- ggplot(cv_df) +
    geom_boxplot(aes(x = condition, y = cv)) +
    facet_wrap(~ experiment, scales = "free_x", nrow = 1) +
    theme_classic() + ylab("coefficient of variation") + xlab("") +
    theme(axis.text.x = element_text(angle = 90, 
                                     vjust = 0.5, hjust = 1))
g
## Warning: Removed 1713 rows containing non-finite values (stat_boxplot).

ggsave(g, filename = "coefficient_variation_protein_boxplot.pdf")
## Saving 7 x 5 in image
## Warning: Removed 1713 rows containing non-finite values (stat_boxplot).
g <- ggplot(cv_df) +
    geom_violin(aes(x = condition, y = cv)) +
    facet_wrap(~ experiment, scales = "free_x", nrow = 1) +
    theme_classic() + ylab("coefficient of variation") + xlab("") +
    theme(axis.text.x = element_text(angle = 90, 
                                     vjust = 0.5, hjust = 1))
g
## Warning: Removed 1713 rows containing non-finite values (stat_ydensity).

ggsave(g, filename = "coefficient_variation_protein_violinplot.pdf")
## Saving 7 x 5 in image
## Warning: Removed 1713 rows containing non-finite values (stat_ydensity).
g <- ggplot(data = cv_df, aes_string(x = "condition", y = "cv", fill = "experiment")) +
    geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .8,
        scale = "count") +
    theme_classic() +
    guides(color = "none") +
    scale_fill_manual(values = as.character(wes_palette("Darjeeling1",
        type = "discrete"))) +
    geom_boxplot(width = .1, show.legend = FALSE, outlier.shape = NA,
        alpha = 0.5) +
    xlab("") + ylab("coefficient of variation") +
    facet_wrap(~ experiment, scales = "free_x", nrow = 1) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

g
## Warning: Removed 1713 rows containing non-finite values (stat_ydensity).
## Removed 1713 rows containing non-finite values (stat_boxplot).

ggsave(g, filename = "coefficient_variation_protein_violinboxplot.pdf")
## Saving 7 x 5 in image
## Warning: Removed 1713 rows containing non-finite values (stat_ydensity).
## Removed 1713 rows containing non-finite values (stat_boxplot).

9 Barplot with overlap (%) for different data sets

## Saving 7 x 5 in image

  1. European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany↩︎